#This script is used to generate Henderson.Rdata
rm(list=ls())
library(fields); library(maps); library(ncdf4);library(abind)
library(geosphere);library(mapdata)
setwd('/Volumes/Disk2/JJA_ozone_MAM/Harvard_Dataverse')

#----- load functions -----------------------
source("./Function/get_geo.R")
source("./Function/get_met.R")
source('./Function/read_detrend.R')
source('./Function/eof.mca.R')
source('./Function/cov4gappy.R')
source('./Function/Henderson.R')
source('./Function/filled.contour3.R')

mov.detrend=Henderson_detrend

#======= Part 1: read data ====================
load("./Data/regrid_JJA-yearly-O3_1980-2013.Rdata")
raw_O3=yearly_O3
for (i in 1:28){
	for (j in 1:11){
		ap= mov.detrend(yearly_O3[i,j,])
		yearly_O3[i,j,]=ap
	}
}
ind1=(lon.frame>=-100 & lon.frame<=-65)
ind2=(lat.frame>=32.5 & lat.frame<=47.5)
US.mask=!is.na(apply(yearly_O3,c(1,2),mean,na.rm=TRUE))
US.area=cal.area(lon.frame,lat.frame)
US.area[!US.mask]=NA
East.O3=cal.area_mean(yearly_O3,US.area,ind1,ind2)

#--------read SST --------------------
xlimit=c(200,340)
ylimit=c(0,60)
met=read_SST('sst.mnmean.nc',3,5,xlim=xlimit,ylim=ylimit)
lon.SST=met$lon
lat.SST=met$lat
raw_SST=met$data[,,33:66]
month_SST=array(NA,dim(met$data[,,33:66]))
for (i in 1:dim(month_SST)[1]){
	for (j in 1:dim(month_SST)[2]){
		month_SST[i,j,]= mov.detrend(met$data[i,j,33:66])
	}
}

#-------- read SLP ------------------------------------
xlimit=c(180,340)
ylimit=c(0,60)
met= read_surface('slp.mon.mean.nc',3,5,xlim=xlimit,ylim=ylimit)
lon.SLP=met$lon
lat.SLP=met$lat
raw_SLP=met$data[,,33:66]
month_SLP=array(NA,dim(met$data[,,33:66]))
for (i in 1:dim(month_SLP)[1]){
	for (j in 1:dim(month_SLP)[2]){
		month_SLP[i,j,]= mov.detrend(met$data[i,j,33:66])
	}
}

#============= Part 2: correlations ===================
par(mar=c(3,2,1,3))
par(mfrow=c(2,2))

#------ SST -------------
ap=find.cor2(month_SST,East.O3,p_value=1)
plot.field(ap,lon.SST-360,lat.SST,type='sign',mai=c(0.5,0.4,0.2,0.2),legend.mar=4.5,zlim=c(-0.65,0.65),xaxt=NULL,yaxt=NULL)
cr=find.Rlim(0.05,34)
clines=contourLines(lon.SST-360, lat.SST, ap,level=c(-cr, cr))
for(k in 1:length(clines)){
	lines(clines[[k]]$x,clines[[k]]$y,lwd=1,col=1,lty=2)
}

	loc=c(-145,-125,25,42)	
	arrows(x0=loc[1],y0=loc[3],x1=loc[2],code=0,col=2,lwd=2)
	arrows(x0=loc[1],y0=loc[4],x1=loc[2],code=0,col=2,lwd=2)
	arrows(x0=loc[1],y0=loc[3],y1=loc[4],code=0,col=2,lwd=2)
	arrows(x0=loc[2],y0=loc[3],y1=loc[4],code=0,col=2,lwd=2)
	ind11=(lon.SST>=(loc[1]+360) & lon.SST<=(loc[2]+360))
	ind12=(lat.SST>=loc[3] & lat.SST<=loc[4])

	loc=c(-70,-40,7,22)	
	arrows(x0=loc[1],y0=loc[3],x1=loc[2],code=0,col=1,lwd=2)
	arrows(x0=loc[1],y0=loc[4],x1=loc[2],code=0,col=1,lwd=2)
	arrows(x0=loc[1],y0=loc[3],y1=loc[4],code=0,col=1,lwd=2)
	arrows(x0=loc[2],y0=loc[3],y1=loc[4],code=0,col=1,lwd=2)
	ind21=(lon.SST>=(loc[1]+360) & lon.SST<=(loc[2]+360))
	ind22=(lat.SST>=loc[3] & lat.SST<=loc[4])

SST_diff0=apply(raw_SST[ind21,ind22,],c(3),mean,na.rm=TRUE)-
		apply(raw_SST[ind11,ind12,],c(3),mean,na.rm=TRUE)
mask=(abs(ap)>=cr);		
area=cal.area(lon.SST,lat.SST);#area[!mask]=NA
SST_diff0=cal.area_mean(raw_SST,area,ind21,ind22)-cal.area_mean(raw_SST,area,ind11,ind12)				
SST_diff1=Henderson_detrend(SST_diff0)
SST_diff2= mov.detrend(SST_diff0)

ap=find.cor2(yearly_O3, SST_diff1,p_value=1)
plot.field(ap[14:28,],lon.frame[14:28],lat.frame,type='sign',xaxt=NULL,yaxt=NULL,legend.mar=4.5,zlim=c(-0.75,0.75))
cr=find.Rlim(0.05,34)
ap[is.na(ap)]=0
for (i in 1:28){
	for (j in 1:11){
		if(abs(ap[i,j])> cr)points(lon.frame[i],lat.frame[j],pch=16,cex=0.5,col=1)
	}
}
SST.ind=(ap>=cr)


#-------- SLP ---------
ap=find.cor2(month_SLP,East.O3,p_value=1)
plot.field(ap,lon.SLP-360,lat.SLP,type='sign',mai=c(0.5,0.4,0.2,0.2),legend.mar=4.5,zlim=c(-0.65,0.65),xaxt=NULL,yaxt=NULL)
cr=find.Rlim(0.05,34)
clines=contourLines(lon.SLP-360, lat.SLP, ap,level=c(-cr, cr))
for(k in 1:length(clines)){
	lines(clines[[k]]$x,clines[[k]]$y,lwd=1,col=1,lty=2)
}

	# loc=c(-170,-130,12.5,32.5)
	loc=c(-165,-135,12.5,32.5)	
	arrows(x0=loc[1],y0=loc[3],x1=loc[2],code=0,col=1,lwd=2)
	arrows(x0=loc[1],y0=loc[4],x1=loc[2],code=0,col=1,lwd=2)
	arrows(x0=loc[1],y0=loc[3],y1=loc[4],code=0,col=1,lwd=2)
	arrows(x0=loc[2],y0=loc[3],y1=loc[4],code=0,col=1,lwd=2)
	ind11=(lon.SLP>=(loc[1]+360) & lon.SLP<=(loc[2]+360))
	ind12=(lat.SLP>=loc[3] & lat.SLP<=loc[4])

	loc=c(-110,-75,12,40)
	# loc=c(-110,-70,12,40)	
	arrows(x0=loc[1],y0=loc[3],x1=loc[2],code=0,col=2,lwd=2)
	arrows(x0=loc[1],y0=loc[4],x1=loc[2],code=0,col=2,lwd=2)
	arrows(x0=loc[1],y0=loc[3],y1=loc[4],code=0,col=2,lwd=2)
	arrows(x0=loc[2],y0=loc[3],y1=loc[4],code=0,col=2,lwd=2)
	ind21=(lon.SLP>=(loc[1]+360) & lon.SLP<=(loc[2]+360))
	ind22=(lat.SLP>=loc[3] & lat.SLP<=loc[4])

SLP_diff0=apply(raw_SLP[ind21,ind22,],c(3),mean,na.rm=TRUE)-
		apply(raw_SLP[ind11,ind12,],c(3),mean,na.rm=TRUE)
mask=(abs(ap)>=cr);		
area=cal.area(lon.SLP, lat.SLP);#area[!mask]=NA
SLP_diff0 =cal.area_mean(raw_SLP,area,ind21,ind22)-cal.area_mean(raw_SLP,area,ind11,ind12)					
SLP_diff1=Henderson_detrend(SLP_diff0)
SLP_diff2=mov.detrend(SLP_diff0)

ap=-find.cor2(yearly_O3, SLP_diff1,p_value=1)
plot.field(ap[14:28,],lon.frame[14:28],lat.frame,type='sign',xaxt=NULL,yaxt=NULL,legend.mar=4.5,zlim=c(-0.75,0.75))
cr=find.Rlim(0.05,34)
ap[is.na(ap)]=0
for (i in 1:28){
	for (j in 1:11){
		if(abs(ap[i,j])> cr)points(lon.frame[i],lat.frame[j],pch=16,cex=0.5,col=1)
	}
}
SLP.ind=(ap>=cr)


#======= Part 3 ==========
cross.predict=function(y,PC1,PC2){
	x=1:34
	y_pred=array(NA,c(length(y),3))
	for (t in 1:34){
		ind=(x!=t)
		train=data.frame(y=y[ind],x1=PC1[ind],x2=PC2[ind])
		test=data.frame(x1=PC1[!ind],x2=PC2[!ind])
		fit<-(lm(y~x1+x2,data=train))
		y_pred[!ind,1]=predict(fit,test)
		fit<-(lm(y~x1,data=train))
		y_pred[!ind,2]=predict(fit,test)
		fit<-(lm(y~x2,data=train))
		y_pred[!ind,3]=predict(fit,test)		
	}
	return(list(R=cor(y_pred,y),pred=y_pred))
}

PC1=SST_diff2
PC2=SLP_diff2
predict_O3=array(NA,dim(yearly_O3))
correlation=array(NA,c(28,11))
for (i in 14:28){
	print(i)
	for (j in 1:11){
		y=(yearly_O3[i,j,])
		if (sum(is.na(y))==0){
			ap=cross.predict(y,PC1,PC2)
			y1=ap$pred[,1]
			y2=ap$pred[,2]
			y3=ap$pred[,3]
			corrs=c(cor(y,y1),cor(y,y2),cor(y,y3))
			predict_O3[i,j,]=ap$pred[,which.max(corrs)]
		}
	}
}
correlation=find.cor(yearly_O3,predict_O3,plim=1)

##
par(mar=c(3,2,1,3))
par(mfrow=c(2,2))

ap=find.cor2(yearly_O3, SST_diff1,p_value=1)
  plot.field(ap[14:26,],lon.frame[14:26],lat.frame,type='sign',legend.mar=4.5,zlim=c(-0.75,0.75), mai=c(0.2,0.2,0.2,0.2))
 cr=find.Rlim(0.05,34)
 ap[is.na(ap)]=0
 for (i in 1:28){
	 for (j in 1:11){
		 if(abs(ap[i,j])> cr)points(lon.frame[i],lat.frame[j],pch=16,cex=0.5,col=1)
	 }
 }
 title(expression(paste('(a) Corr ozone vs. ',Delta,'SST' ,sep='')),cex.main=1.1)
 text('r',x=par("usr")[2]+1.5,y=par("usr")[4],srt=0, adj = 0,xpd=TRUE,cex=1.3,font.main=1)

 ap=-find.cor2(yearly_O3, SLP_diff1,p_value=1)
 plot.field(ap[14:26,],lon.frame[14:26],lat.frame,type='sign',legend.mar=4.5,zlim=c(-0.75,0.75),mai=c(0.2,0.2,0.2,0.2))
 cr=find.Rlim(0.05,34)
 ap[is.na(ap)]=0
 for (i in 1:28){
	 for (j in 1:11){
		 if(abs(ap[i,j])> cr)points(lon.frame[i],lat.frame[j],pch=16,cex=0.5,col=1)
	 }
 }
 title(expression(paste('(b) Corr ozone vs. ',Delta,'SLP' ,sep='')),cex.main=1.1)
 text('r',x=par("usr")[2]+1.5,y=par("usr")[4],srt=0, adj = 0,xpd=TRUE,cex=1.3,font.main=1)

space.ind=!SST.ind & !SLP.ind
correlation[space.ind]=NA
plot.field((correlation[14:26,]),lon.frame[14:26],lat.frame,type='sign',legend.mar=4.5,zlim=c(-0.75,0.75),mai=c(0.2,0.2,0.2,0.2))
title('(c) Cross-validated Correlation',cex.main=1.1,font.main=1)
text('r',x=par("usr")[2]+1.5,y=par("usr")[4],srt=0, adj = 0,xpd=TRUE,cex=1.3,font.main=1)

raw_O3_2=raw_O3
predict_O3_2= predict_O3
yearly_O3_2=yearly_O3
for(i in 1:28){
	for(j in 1:11){
		if(space.ind[i,j]) {
			raw_O3_2[i,j,]=NA	
			predict_O3_2[i,j,]=NA
			yearly_O3_2[i,j,]=NA
		}
	}
}

mai=c(0.3, 0.5, 0.2, 0.1)
mgp=c(1.8, 0.5, 0)
tcl=-0.3
ps=15
par(mai=mai, mgp=mgp, tcl=tcl, ps=ps)
ind1=(lon.frame>=-100 & lon.frame<=-65)
ind2=(lat.frame>=32.5 & lat.frame<=50)
y1=apply(yearly_O3[ind1,ind2,],c(3),mean,na.rm=TRUE)
y2=apply(predict_O3[ind1,ind2,],c(3),mean,na.rm=TRUE)
R=round(cor((y1),(y2)),digits=2)
plot(1980:2013,(y1),pch=16,col=1,type='o',xlab='',ylab='ozone anomaly(ppbv)',ylim=c(-10,10))
lines(1980:2013,(y2),pch=16,type='o',col=4)
text(paste('r(MA) =',R,sep=''),x=2007,y=7,col=4,cex=0.9)
legend(x=1995,y=-6,c('obs','prediction'),col=c(1,4),lty=1,pch=16,bty="n",text.col=c(1,4))
title('(d) Cross-validated Correlation in East',cex.main=0.9,cex.main=0.95,font.main=1)


save(yearly_O3, space.ind ,PC1,PC2, predict_O3,lon.frame,lat.frame,file="./Figures/Figure 2/Henderson.Rdata")
